##########################################################
# PREPARE FILES - comparisons with non-protected areas ###
##########################################################
# warning: depending on the treatment and comparison, you might have to adapt the code at places
# questions to bowydenbraber@gmail.com

#read file
cu_base_agri<-read.csv("CU00_COMPLETE.csv")

#set the increases
#proportion
for(k in c("ALL","UC_X","PI","US_X","TI")) cu_base_agri[,paste("p_",k,"_incr",sep="")] <- cu_base_agri[,paste("p_",k,"_10",sep="")] - cu_base_agri[,paste("p_",k,"_00",sep="")]
#by threshold (10%)
for(k in c("UC_X","PI","US_X","TI")) cu_base_agri[,paste("is_",k,"_incr",sep="")] <- cu_base_agri[,paste("p_",k,"_incr",sep="")] > 0.1
#by threshold (50%)
for(k in c("UC_X","PI","US_X","TI")) cu_base_agri[,paste("is_",k,"_incr50",sep="")] <- cu_base_agri[,paste("p_",k,"_incr",sep="")] > 0.5
#set non increases
cu_base_agri$is_NOPA_incr <- cu_base_agri$p_ALL_incr < 0.01

#set the presences (threshold 1%)
for(k in c("UC_X","PI","US_X","TI")) cu_base_agri[,paste("is_",k,"_presb",sep="")] <- cu_base_agri[,paste("p_",k,"_00",sep="")] > 0.01

#filter further
cu_base_agri<-filter(cu_base_agri,is_NOPA_incr==FALSE | p_ALL_incr<0.01)# removes slight increases under 1%
cu_base_agri<-cu_D[!(cu_base_agri$is_NOPA_incr==TRUE & cu_base_agri$p_ALL_00>0.01),] # remove instances where PA is present, but not increasing

##########only run when comparing with NON-PA:
cu_prep_nonpa<-cu_base_agri

#write in between
#write.csv(cu_prep_nonpa,file="CU00_PREPARED_NONPA.csv")
#read in between
#cu<-read.csv("CU00_PREPARED_NONPA.csv.csv")

cu<-cu_prep_nonpa
cu.ct<-dplyr::filter(cu,control>0) 


#PREPARE COVARIATES 

#covariates for all analyses
covariates=c("mine_presabs_before2000",
             "mun.state.num",
             "forest_mean",
             "tt_standard1",
             "d.pop.00", 
             "area",
             "hhh.inc.avg.00_infl",
             "p.hhh.lit.00",
             "gini.00",
             "p.hh.bs.poor.00",
             "slope",
             "elevation",
             "flooded")


################################Set treatment (for all analyses)

#edit for each PA (here IT as example)
#remove increases of other PAs
cu<-cu %>% dplyr::filter(p_US_X_incr<0.0001) %>% dplyr::filter(p_PI_incr<0.0001)
#remove baselines of other PAs
cu<-cu %>% dplyr::filter(p_US_X_00<0.0001) %>% dplyr::filter(p_PI_00<0.0001)
#remove slight increases between 0.01 and 0.1
cu<-cu %>% dplyr::filter(!(p_TI_incr>0.01 & is_TI_incr==FALSE))
#remove areas where TI increases, but propsettled is higher than 0.01
if(!(pacontrol %in% c("general","mining"))){
  cu<-cu %>% dplyr::filter(!(propsettled>0.01 & is_TI_incr==FALSE))
}



#if robustness test  
cu.tr <- cu[cu[,paste0("is_",patype,"_incr50")], ]
#if 10% treshold
cu.tr <- cu[cu[,paste0("is_",patype,"_incr")], ]

#if expansion to after 2006
cu_after2006<-cu.tr[!(cu.tr$is_NOPA_incr==FALSE & cu.tr$p_TI_10-cu.tr$p_TI_05<0.1 & cu.tr$p_TI_05>0.1),]



######################FINAL PART FOR ALL ANALYSES

#set treatment to binary and combine data
cu.tr$tr <- 1
cu.ct$tr <- 0
cu.in <- rbind(cu.tr,cu.ct)

#scale the numeric covariates
covs_forlm_numeric<-covs_forlm_numeric<-c("forest_mean", 
                                          "tt_standard1",
                                          "d.pop.00", 
                                          "area",
                                          "hhh.inc.avg.00_infl",
                                          "p.hhh.lit.00",
                                          "gini.00",
                                          "p.hh.bs.poor.00",
                                          "slope",
                                          "elevation",
                                          "flooded")

cu.in[c(covs_forlm_numeric_scaled)] <- lapply(cu.in[covs_forlm_numeric], function(x) c(scale(x)))

#write
write.csv(cu.in,"CU00_PREPARED_FOR_MATCHING.csv")
